This document provides model monitoring analysis for the BBS2.0 credit score. The report analyzes model performance stability, feature drift, and business implications by comparing the current production data with the development sample.
Key Information:
# User Configuration - Modify these parameters
data_path <- "../../model_monitoring_out_bbs.csv"
reference_data_path <- "bbs2_train_set_unity_scored.csv"
# Common configuration
target_var <- "target"
pred_var <- "predicted"
# List of features to be analysed
features_to_analyze <- c(
"avg_disbursed_amount_personal_loan",
"max_dpd_m24_personal_loan",
"max_dpd_m3_active",
"max_dpd_m6",
"max_dpd_m6_active",
"max_dpd_m12_personal_loan",
"total_disbursed_amount_credit_card",
"oldest_trade_credit_card_active",
"inq_count_6m_two_wheeler_loan",
"total_disbursed_amount_closed",
"oldest_trade_credit_card",
"max_dpd_m24",
"max_dpd_m6_personal_loan",
"total_disbursed_amount_unsecured_closed",
"inq_count_3m_two_wheeler_loan",
"oldest_trade",
"total_trd_cnt_consumer_loan_active_ratio",
"max_dpd_m3",
"oldest_trade_unsecured",
"pct_accts_delq_30d_ever",
"inq_count_12m_two_wheeler_loan",
"utilization_personal_loan",
"inq_count_ever_two_wheeler_loan",
"max_disbursed_amount_unsecured_closed",
"max_dpd_m36",
"total_trd_cnt_credit_card_active_ratio",
"utilization_unsecured",
"max_disbursed_amount_unsecured",
"max_dpd_m36_personal_loan",
"median_trade_age_secured",
"max_credit_limit",
"cc_ocl_balance"
)
pacman::p_load(
"tidyverse",
"plotly",
"DT",
"pROC",
"ROCR",
"scales",
"gridExtra",
"reshape2",
"caret",
"e1071",
"kableExtra",
"data.table",
"magrittr",
"patchwork",
"lubridate"
)
# Source helper functions
source("helper_functions_BBS.R")
# Load data using data.table for better performance
current_data <- fread(data_path, showProgress = FALSE)
reference_data <- fread(reference_data_path, showProgress = FALSE)
# Convert to data.frame for compatibility with other functions
# current_data <- as.data.frame(current_data)
# reference_data <- as.data.frame(reference_data)
# Validate features exist in both data sets
missing_current <- setdiff(features_to_analyze, names(current_data))
missing_reference <- setdiff(features_to_analyze, names(reference_data))
if (length(missing_current) | length(missing_reference) > 0) {
stop("Missing features")
}
# Extract relevant columns
# current_features <- current_data[, ..features_to_analyze]
# reference_features <- reference_data[, ..features_to_analyze]
# Validate target is numeric
current_target <- current_data[[target_var]]
reference_target <- reference_data[[target_var]]
# Validate predictions are numeric and finite
reference_predictions <- reference_data[[pred_var]]
current_predictions <- current_data[[pred_var]]
data.frame(
Metric = c("Data samples", "Data samples", "Target rate", "Target rate"),
Dataset = c("Current", "Reference", "Current", "Reference"),
Value = c(
nrow(current_data),
nrow(reference_data),
sprintf("%.1f%%", mean(current_target, na.rm = TRUE) * 100),
sprintf("%.1f%%", mean(reference_target, na.rm = TRUE) * 100)
)
) %>% datatable()
Data quality is a critical foundation for reliable model monitoring. Poor data quality can lead to misleading conclusions about model performance and stability. This section examines three key aspects of data quality that could impact model performance:
Each analysis includes color-coded highlighting to quickly identify areas of concern:
The Data Shift Summary provides an aggregate quality score that combines these metrics to prioritize variables requiring attention.
Significant difference in missing values across development and validation samples could point towards:
Missing values should be handled appropriately and not always encoded as zeros during development/testing.
# Calculate missing values for each feature
missing_values <- data.frame(
Feature = features_to_analyze,
Current_Missing_Pct = NA,
Reference_Missing_Pct = NA
)
for (feature_name in features_to_analyze) {
missing_pcts <- check_missing_values(feature_name)
missing_values$Current_Missing_Pct[missing_values$Feature == feature_name] <- missing_pcts[1]
missing_values$Reference_Missing_Pct[missing_values$Feature == feature_name] <- missing_pcts[2]
}
# Add highlighting for significant changes in missing values
datatable(
missing_values %>%
arrange(desc(Current_Missing_Pct)),
options = list(pageLength = 10)
) %>%
formatPercentage(columns = c("Current_Missing_Pct", "Reference_Missing_Pct"), digits = 1) %>%
formatStyle(
columns = c("Current_Missing_Pct", "Reference_Missing_Pct"),
backgroundColor = styleInterval(c(0.05, 0.2), c("white", "#fff3cd", "#f8d7da"))
)
Significant difference in percentage of zero-values across development and validation samples could point towards:
# Calculate zero value percentages for each feature
zero_values <- data.frame(
Feature = features_to_analyze,
Current_Zero_Pct = NA,
Reference_Zero_Pct = NA
)
for (feature_name in features_to_analyze) {
zero_pcts <- check_zero_values(feature_name)
zero_values$Current_Zero_Pct[zero_values$Feature == feature_name] <- zero_pcts[1]
zero_values$Reference_Zero_Pct[zero_values$Feature == feature_name] <- zero_pcts[2]
}
zero_values %<>% mutate(diff = abs(Current_Zero_Pct - Reference_Zero_Pct))
# Add highlighting for high percentages of zero values
datatable(
zero_values %>%
arrange(desc(diff)),
options = list(pageLength = 10)
) %>%
formatPercentage(columns = c("Current_Zero_Pct", "Reference_Zero_Pct", "diff"), digits = 2) %>%
formatStyle(
columns = c("diff"),
backgroundColor = styleInterval(c(0.05, 0.2), c("white", "#fff3cd", "#f8d7da"))
)
Higher percentage of outliers typically indicate a skew. Depending on the attribute, this could indicate a shift towards better(or worse) sourcing quality. Here, outliers are detected using the inter-quartile method and the percentage of outliers between current and reference datasets are compared.
# Calculate outlier percentages for each feature
outlier_values <- data.frame(
Feature = features_to_analyze,
Current_Outlier_Pct = NA,
Reference_Outlier_Pct = NA
)
for (feature_name in features_to_analyze) {
outlier_pcts <- check_outliers(feature_name)
outlier_values$Current_Outlier_Pct[outlier_values$Feature == feature_name] <- outlier_pcts[1]
outlier_values$Reference_Outlier_Pct[outlier_values$Feature == feature_name] <- outlier_pcts[2]
}
outlier_values %<>%
mutate(diff = abs(Current_Outlier_Pct - Reference_Outlier_Pct))
# Add highlighting for high percentages of outliers
datatable(
outlier_values %>%
arrange(desc(diff)),
options = list(pageLength = 10)
) %>%
formatPercentage(columns = c("Current_Outlier_Pct", "Reference_Outlier_Pct", "diff"), digits = 2) %>%
formatStyle(
columns = c("diff"),
backgroundColor = styleInterval(c(0.05, 0.2), c("white", "#fff3cd", "#f8d7da"))
)
# Create summary of data quality issues
quality_summary <- data.frame(
Feature = features_to_analyze,
Missing_Value_Change = abs(missing_values$Current_Missing_Pct - missing_values$Reference_Missing_Pct),
Zero_Value_Change = abs(zero_values$Current_Zero_Pct - zero_values$Reference_Zero_Pct),
Outlier_Change = abs(outlier_values$Current_Outlier_Pct - outlier_values$Reference_Outlier_Pct)
)
quality_summary$Quality_Score <- 100 - (
quality_summary$Missing_Value_Change * 100 +
quality_summary$Zero_Value_Change * 100 * 0.5 +
quality_summary$Outlier_Change * 100
)
quality_summary$Quality_Status <- case_when(
quality_summary$Quality_Score >= 90 ~ "Good",
quality_summary$Quality_Score >= 70 ~ "Warning",
TRUE ~ "Poor"
)
datatable(
quality_summary %>%
arrange(Quality_Score),
options = list(pageLength = 10)
) %>%
formatPercentage(columns = c(
"Missing_Value_Change",
"Zero_Value_Change",
"Outlier_Change"
), digits = 2) %>%
formatRound(columns = "Quality_Score", digits = 0) %>%
formatStyle("Quality_Status",
backgroundColor = styleEqual(
c("Good", "Warning", "Poor"),
c("#d4edda", "#fff3cd", "#f8d7da")
)
)
Based on shifts in missing, zero and outlier values, a summary score is generated to highlight the variables that are the most affected. This quality score helps prioritize which variables require immediate attention:
Variables with poor quality scores should be investigated to determine:
Note that since during development of the BBS2 model, all missing values were replaced by zeroes, the same has been done here to calculate performance metrics.
current_data[is.na(current_data)] <- 0
reference_data[is.na(reference_data)] <- 0
Population Stability Index (PSI) is a critical metric for monitoring the stability of model inputs and outputs over time. It quantifies the degree of shift between score distributions, helping identify when a model may need recalibration or retraining.
A PSI value below 0.1 generally indicates the model remains stable, while values between 0.1 and 0.25 suggest moderate shifts that warrant monitoring. Values above 0.25 indicate significant population drift that may require model recalibration or retraining.
PSI Interpretation Guide:
# Create data frame for PSI interpretation guide
psi_guide <- data.frame(
PSI_Range = c("< 0.1", "0.1 - 0.25", "> 0.25"),
Interpretation = c("Stable", "Monitor", "Action Required"),
Description = c(
"No significant population shift detected",
"Moderate population shift, requires monitoring",
"Significant population shift, action required"
),
Status = c("Stable", "Monitor", "Action Required")
)
# Create formatted datatable
datatable(psi_guide,
options = list(
dom = "t", # Show only the table (no search, pagination)
ordering = FALSE, # Disable ordering
columnDefs = list(list(targets = 3, visible = FALSE)) # Hide Status column
),
rownames = FALSE
) %>%
formatStyle(
columns = c("PSI_Range", "Interpretation", "Description"),
valueColumns = "Status",
backgroundColor = styleEqual(
c("Stable", "Monitor", "Action Required"),
c("#d4edda", "#fff3cd", "#f8d7da")
),
fontWeight = "bold"
)
To assess overall stability of the model, PSI is computed comparing the development and current datasets.
# Calculate model PSI and distribution
model_psi_results <- calculate_model_psi()
# Create PSI summary table with thresholds based on credit risk best practices
psi_summary <- data.frame(
Metric = c("Model Score PSI", "PSI Interpretation"),
Value = c(
round(model_psi_results$psi, 3),
case_when(
model_psi_results$psi < 0.10 ~ "No significant change",
model_psi_results$psi < 0.15 ~ "Moderate change - Monitor",
model_psi_results$psi < 0.25 ~ "Significant change - Investigate",
TRUE ~ "Critical change - Action required"
)
)
)
# Display PSI summary with industry-standard color coding
datatable(psi_summary, options = list(dom = "t")) %>%
formatStyle(
"Value",
backgroundColor = styleEqual(
c(
"No significant change",
"Moderate change - Monitor",
"Significant change - Investigate",
"Critical change - Action required"
),
c("#90EE90", "#FFD700", "#FFA07A", "#FF6B6B")
)
)
Additionally, PSI values across score bins are shown below to understand which bins are affected the most. This bin-level analysis helps identify specific segments of the score distribution where shifts are occurring, which is particularly important for understanding the nature of any population drift.
When examining the bin-level PSI contributions, it’s worth looking into:
These patterns can provide insights into whether the drift is due to changes in applicant quality, economic conditions, or potential issues with the model itself.
# Display table to show where the shifts have happened
bins <- quantile(reference_predictions, seq(0, 1, length.out = 11))
reference_cuts <- cut(reference_predictions, bins, include.lowest = T)
current_cuts <- cut(current_predictions, bins, include.lowest = T)
reference_cuts <- table(reference_cuts)
current_cuts <- table(current_cuts)
data.table(
`Score Bins` = names(reference_cuts),
`Reference` = as.numeric(reference_cuts),
`Current` = as.numeric(current_cuts)
) %>%
mutate(
Reference = `Reference` / sum(`Reference`),
Current = `Current` / sum(`Current`)
) %>%
mutate(PSI = (Reference - Current) * log(Reference / Current)) %>%
mutate(PSI = PSI / sum(PSI)) %>%
datatable(options = list(columnDefs = list(list(targets = 4, visible = FALSE)))) %>%
formatPercentage(columns = c("Reference", "Current"), digits = 1) %>%
formatStyle(
columns = c("Reference", "Current"),
valueColumns = "PSI",
backgroundColor = styleInterval(c(0.05, 0.1), c("white", "#fff3cd", "#f8d7da"))
)
To further assess model stability across different sub-populations, PSI is computed across different groups. Note that for these computations the earliest available quarter in the current dataset has been used as reference. Blank PSI values would be a result of missing/low data.
psi_result <- calculate_psi_group(
data = current_data,
score_col = pred_var,
date_col = "SourceMonth",
group_col = "bureau_buckets"
)
print.psi_analysis(psi_result)
psi_result <- calculate_psi_group(
data = current_data,
score_col = pred_var,
date_col = "SourceMonth",
group_col = "lrvs_risk_segment"
)
print.psi_analysis(psi_result)
psi_result <- calculate_psi_group(
data = current_data,
score_col = pred_var,
date_col = "SourceMonth",
group_col = "vintage_buckets"
)
print.psi_analysis(psi_result)
psi_result <- calculate_psi_group(
data = current_data,
score_col = pred_var,
date_col = "SourceMonth",
group_col = "lrv_pincodeColor"
)
print.psi_analysis(psi_result)
psi_result <- calculate_psi_group(
data = current_data,
score_col = pred_var,
date_col = "SourceMonth",
group_col = "lrvs_risk_group"
)
print.psi_analysis(psi_result)
The score distribution comparison is a critical component of model monitoring that helps identify shifts in the model’s scoring patterns over time. This analysis provides insights into how the distribution of scores has changed between the reference and current periods, which can indicate potential model drift.
The visualization below presents a comparison through three complementary views:
Density Plot: Shows the overall shape and central tendency of score distributions for both periods. Vertical dashed lines indicate the mean scores, allowing for quick visual comparison of central tendencies.
Q-Q Plot: Compares the quantiles of both distributions against each other. Points falling along the diagonal red line indicate similar distributions, while deviations suggest systematic differences in how scores are distributed.
Statistical Summary: Provides key numerical metrics including mean, median, standard deviation, skewness, and kurtosis. The percentage change highlights the magnitude and direction of shifts in these statistics.
# Calculate statistics
ref_stats <- c(
Mean = mean(reference_predictions),
Median = median(reference_predictions),
SD = sd(reference_predictions),
Skewness = skewness(reference_predictions),
Kurtosis = kurtosis(reference_predictions)
)
curr_stats <- c(
Mean = mean(current_predictions),
Median = median(current_predictions),
SD = sd(current_predictions),
Skewness = skewness(current_predictions),
Kurtosis = kurtosis(current_predictions)
)
# Create data frame for stats
stats_df <- data.frame(
Statistic = names(ref_stats),
Reference = round(ref_stats, 2),
Current = round(curr_stats, 2),
Difference = round(curr_stats - ref_stats, 2),
Pct_Change = round((curr_stats - ref_stats) / ref_stats * 100, 1)
)
combined_df <- data.frame(
Score = c(reference_predictions, current_predictions),
Type = factor(
c(
rep("Reference", length(reference_predictions)),
rep("Current", length(current_predictions))
),
levels = c("Reference", "Current")
)
)
# Store mean values for reference in the plot
ref_mean <- ref_stats["Mean"]
curr_mean <- curr_stats["Mean"]
# Create density plot
p1 <- ggplot(combined_df, aes(x = Score, fill = Type)) +
geom_density(alpha = 0.5, adjust = 3) +
scale_fill_manual(values = c("Current" = "#EC5228", "Reference" = "#3F7D58")) +
geom_vline(xintercept = ref_mean, linetype = "dashed", color = "#3F7D58", size = 0.8) +
geom_vline(xintercept = curr_mean, linetype = "dashed", color = "#EC5228", size = 0.8) +
labs(
title = "Score Distribution Comparison",
x = "Score",
y = "Density"
) +
theme_minimal() +
theme(legend.position = "top", legend.title = element_blank())
# Create a second plot for average scores
idx <- sample(1:min(length(current_predictions), length(reference_predictions)), size = 1000)
p2 <- ggplot(
data.frame(
x = sort(reference_predictions[idx]),
y = sort(current_predictions[idx])
),
aes(x = x, y = y)
) +
geom_point(color = "#666666", alpha = 0.6) +
geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed") +
labs(
title = "Q-Q Plot",
x = "Reference Quantiles",
y = "Current Quantiles"
) +
theme_minimal()
# Create table for stats
p3 <- tableGrob(stats_df, rows = NULL, theme = ttheme_minimal(
core = list(
fg_params = list(hjust = 0, x = 0.1),
bg_params = list(fill = c("white", "grey95"))
),
colhead = list(fg_params = list(fontface = "bold"))
))
# Arrange plots
grid.arrange(p1, p2, p3,
layout_matrix = rbind(c(1, 1, 1), c(2, 2, 3), c(2, 2, 3)),
heights = c(1, 0.8, 0.8)
)
The discrimination metrics below provide a comprehensive assessment of the model’s ability to differentiate between good and bad outcomes. Each metric offers a unique perspective on model performance:
AUC (Area Under the ROC Curve): Measures the model’s ability to rank order risk correctly. An AUC of 0.5 indicates random predictions, while 1.0 represents perfect discrimination.
Gini Coefficient: A transformation of AUC (Gini = 2*AUC-1) that ranges from 0 to 1, with higher values indicating better discrimination power. The Gini coefficient is widely used in credit scoring to assess model quality, with values above 0.4 generally considered good for credit risk models.
KS (Kolmogorov-Smirnov) Statistic: Measures the maximum separation between the cumulative distribution functions of scores for good and bad accounts. Higher values indicate better separation between the two populations, with the theoretical maximum being 1.0.
These metrics should be monitored for stability over time. Any significant decrease could indicate model degradation or population drift requiring further investigation.
Note that significant change in Gini/IV due to low population size should be ignored
# First, create a table showing thresholds for interpreting feature metrics
metrics_guide <- data.frame(
Metric = c("Information Value (IV)", "Correlation", "KS Statistic", "Gini Coefficient"),
Poor = c("<0.02", "<0.1", "<0.1", "<0.2"),
Fair = c("0.02-0.1", "0.1-0.2", "0.1-0.2", "0.2-0.4"),
Good = c("0.1-0.3", "0.2-0.4", "0.2-0.3", "0.4-0.6"),
Excellent = c(">0.3", ">0.4", ">0.3", ">0.6")
)
# Display the metrics guide table
datatable(metrics_guide,
options = list(
dom = "t",
ordering = FALSE
),
rownames = FALSE
) %>%
formatStyle(
"Poor",
backgroundColor = "#f8d7da",
fontWeight = "bold"
) %>%
formatStyle(
"Fair",
backgroundColor = "#fff3cd",
fontWeight = "bold"
) %>%
formatStyle(
"Good",
backgroundColor = "#c3e6cb",
fontWeight = "bold"
) %>%
formatStyle(
"Excellent",
backgroundColor = "#d4edda",
fontWeight = "bold"
)
# Calculate ROC curve
roc_obj <- roc(current_target, current_predictions)
auc_value <- round(auc(roc_obj), 3)
gini_value <- 2 * auc_value - 1
roc_obj <- roc(reference_target, reference_predictions)
auc_value_ref <- round(auc(roc_obj), 3)
gini_value_ref <- 2 * auc_value - 1
# Calculate KS statistic
pred <- prediction(current_predictions, current_target)
perf <- performance(pred, "tpr", "fpr")
ks_value <- max(abs(unlist(perf@y.values) - unlist(perf@x.values)))
pred <- prediction(reference_predictions, reference_target)
perf <- performance(pred, "tpr", "fpr")
ks_value_ref <- max(abs(unlist(perf@y.values) - unlist(perf@x.values)))
# Create metrics table with assessment
metrics_table <- data.frame(
Metric = c("AUC", "Gini", "KS Statistic"),
Value_Current = c(auc_value, gini_value, ks_value),
Assessment = c(
case_when(
auc_value >= 0.8 ~ "Excellent",
auc_value >= 0.7 ~ "Good",
auc_value >= 0.6 ~ "Fair",
TRUE ~ "Poor"
),
case_when(
gini_value >= 0.6 ~ "Excellent",
gini_value >= 0.4 ~ "Good",
gini_value >= 0.2 ~ "Fair",
TRUE ~ "Poor"
),
case_when(
ks_value >= 0.5 ~ "Excellent",
ks_value >= 0.3 ~ "Good",
ks_value >= 0.2 ~ "Fair",
TRUE ~ "Poor"
)
),
Value_Reference = c(auc_value_ref, gini_value_ref, ks_value_ref)
)
datatable(metrics_table, options = list(dom = "t")) %>%
formatRound("Value_Current", 3) %>%
formatRound("Value_Reference", 3) %>%
formatStyle(
"Assessment",
backgroundColor = styleEqual(
c("Excellent", "Good", "Fair", "Poor"),
c("#d4edda", "#c3e6cb", "#fff3cd", "#f8d7da")
),
fontWeight = "bold"
)
gini_result <- calculate_gini(
data = current_data,
score_col = "predicted",
target_col = "target",
date_col = "SourceMonth",
group_col = "bureau_buckets"
)
print(gini_result)
gini_result <- calculate_gini(
data = current_data,
score_col = "predicted",
target_col = "target",
date_col = "SourceMonth",
group_col = "lrvs_risk_segment"
)
print(gini_result)
gini_result <- calculate_gini(
data = current_data,
score_col = "predicted",
target_col = "target",
date_col = "SourceMonth",
group_col = "lrv_pincodeColor"
)
print(gini_result)
gini_result <- calculate_gini(
data = current_data,
score_col = "predicted",
target_col = "target",
date_col = "SourceMonth",
group_col = "vintage_buckets"
)
print(gini_result)
Risk ranking analysis evaluates how well the model separates good and bad accounts across the score distribution. This analysis is critical for assessing model discrimination power and for making informed cutoff decisions.
The following visualizations compare the model’s performance across score deciles between the current and reference periods. These charts help identify shifts in risk distribution and model effectiveness.
# Calculate risk metrics
risk_metrics <- create_risk_ranking_plot()
# Define a consistent color palette for all charts
current_color <- "#ff7f0e" # Green for current
reference_color <- "#3F7D58" # Orange for reference
# Base plot for bad rates
p1 <- ggplot(risk_metrics, aes(x = decile, group = period)) +
geom_bar(
aes(y = bad_rate, fill = period),
stat = "identity",
position = position_dodge(width = 0.7),
width = 0.6
) +
scale_fill_manual(
values = c("Current" = current_color, "Reference" = reference_color),
name = "Period"
) +
# Format y-axis as percentage
scale_y_continuous(
labels = scales::percent_format(accuracy = 0.1),
expand = expansion(mult = c(0, 0.1))
) +
labs(
title = "Bad Rate by Decile",
x = "Decile",
y = "Bad Rate"
) +
theme_minimal() +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 12),
axis.text.x = element_text(angle = 0, hjust = 0.5),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "grey80", fill = NA, size = 0.5)
)
# Create a second plot for average scores
p2 <- ggplot(risk_metrics, aes(x = decile, group = period)) +
geom_line(
aes(y = avg_score, color = period, linetype = period),
size = 1
) +
geom_point(
aes(y = avg_score, color = period),
size = 2
) +
scale_color_manual(
values = c("Current" = current_color, "Reference" = reference_color),
name = "Period"
) +
scale_linetype_manual(
values = c("Current" = "solid", "Reference" = "dashed"),
name = "Period"
) +
# Format y-axis with appropriate decimal places
scale_y_continuous(
labels = scales::number_format(accuracy = 0.01),
expand = expansion(mult = c(0.05, 0.1))
) +
labs(
title = "Average Score by Decile",
x = "Decile",
y = "Average Score"
) +
theme_minimal() +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 12),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "grey80", fill = NA, size = 0.5)
)
# Create a third plot for cumulative response
p3 <- ggplot(risk_metrics, aes(x = decile, group = period)) +
geom_line(
aes(y = captured_response, color = period, linetype = period),
size = 1
) +
geom_point(
aes(y = captured_response, color = period),
size = 2
) +
scale_color_manual(
values = c("Current" = current_color, "Reference" = reference_color),
name = "Period"
) +
scale_linetype_manual(
values = c("Current" = "solid", "Reference" = "dashed"),
name = "Period"
) +
# Format y-axis as percentage
scale_y_continuous(
labels = scales::percent_format(accuracy = 1),
expand = expansion(mult = c(0, 0.05))
) +
labs(
title = "Cumulative Response by Decile",
x = "Decile",
y = "Cumulative Response"
) +
theme_minimal() +
theme(
legend.position = "none",
plot.title = element_text(hjust = 0.5, face = "bold", size = 12),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "grey80", fill = NA, size = 0.5)
)
p1 + p2 + p3 + plot_layout(guides = "keep") & theme(legend.position = "bottom")
The risk decile analysis provides three key perspectives on model performance:
When reviewing these charts, focus on:
Changes in these patterns may warrant further investigation into specific population segments or model recalibration.
Calibration assesses how well predicted probabilities align with actual outcomes. A well-calibrated model should produce probability estimates that match the observed frequency of events.
The Brier score is a quantitative measure of calibration quality. Lower scores indicate better calibration:
# Create data frame for Brier score interpretation guide
brier_guide <- data.frame(
Brier_Score_Range = c("0.00 - 0.10", "0.10 - 0.20", "0.20 - 0.30", "0.30 - 0.40", "> 0.40"),
Interpretation = c("Excellent", "Good", "Acceptable", "Poor", "Very Poor"),
Description = c(
"Exceptional calibration, predictions closely match actual outcomes",
"Strong calibration, reliable for decision-making",
"Reasonable calibration, but some room for improvement",
"Significant calibration issues, use with caution",
"Major calibration problems, model may need recalibration"
),
Status = c("Excellent", "Good", "Acceptable", "Poor", "Very Poor")
)
# Create formatted datatable
datatable(brier_guide,
options = list(
dom = "t", # Show only the table (no search, pagination)
ordering = FALSE, # Disable ordering
columnDefs = list(list(targets = 3, visible = FALSE)) # Hide Status column
),
rownames = FALSE
) %>%
formatStyle(
columns = c("Brier_Score_Range", "Interpretation", "Description"),
valueColumns = "Status",
backgroundColor = styleEqual(
c("Excellent", "Good", "Acceptable", "Poor", "Very Poor"),
c("#d4edda", "#c3e6cb", "#fff3cd", "#f8d7da", "#dc3545")
),
fontWeight = "bold"
)
calibration_plot <- create_calibration_plot()
calibration_plot
Note that if a variable could not be discretised, IV will not populate.
# Calculate metrics for current and reference datasets
current_metrics <- calculate_performance_metrics(current_data, current_target, features_to_analyze)
reference_metrics <- calculate_performance_metrics(reference_data, reference_target, features_to_analyze)
# Calculate absolute and percentage changes
performance_comparison <- current_metrics %>%
rename(
Current_IV = IV,
Current_Correlation = Correlation,
Current_KS = KS_Statistic,
Current_Gini = Gini
) %>%
left_join(
reference_metrics %>%
rename(
Reference_IV = IV,
Reference_Correlation = Correlation,
Reference_KS = KS_Statistic,
Reference_Gini = Gini
),
by = "Feature"
) %>%
mutate(
IV_Change = Current_IV - Reference_IV,
IV_Pct_Change = (Current_IV - Reference_IV) / Reference_IV * 100,
Correlation_Change = Current_Correlation - Reference_Correlation,
KS_Change = Current_KS - Reference_KS,
Gini_Change = Current_Gini - Reference_Gini,
Gini_Pct_Change = (Current_Gini - Reference_Gini) / Reference_Gini * 100
) %>%
arrange(Gini_Pct_Change)
# Display the comparison with highlighting
datatable(
performance_comparison %>%
select(
Feature,
Current_IV, Reference_IV, IV_Pct_Change,
Current_Correlation, Reference_Correlation, Correlation_Change
),
options = list(
pageLength = 10,
buttons = c("copy", "csv", "excel")
),
rownames = FALSE
) %>%
formatRound(columns = 2:7, digits = 3) %>%
# Highlight Correlation changes
formatStyle(
"Correlation_Change",
background = styleInterval(
c(-0.2, -0.1, 0.1, 0.2),
c("#f8d7da", "#fff3cd", "white", "#fff3cd", "#f8d7da")
)
) %>%
# Highlight Gini changes
formatStyle(
"IV_Pct_Change",
background = styleInterval(
c(-0.2, -0.1, 0.1, 0.2),
c("#f8d7da", "#fff3cd", "white", "#fff3cd", "#f8d7da")
)
)
# Calculate PSI for each feature
psi_values <- data.frame(
Feature = features_to_analyze,
PSI = sapply(features_to_analyze, calculate_psi)
)
# Add categorization based on PSI thresholds and sort by PSI value
psi_values <- psi_values %>%
mutate(
PSI_Category = case_when(
PSI < 0.1 ~ "Stable",
PSI < 0.25 ~ "Monitor",
TRUE ~ "Action Required"
),
# Reorder factors for proper legend ordering
PSI_Category = factor(PSI_Category, levels = c("Stable", "Monitor", "Action Required"))
) %>%
# Sort by PSI value descending
arrange(desc(PSI))
# Create a new column for ordered feature names
psi_values$Feature_Ordered <- factor(psi_values$Feature,
levels = psi_values$Feature[order(psi_values$PSI, decreasing = TRUE)]
)
psi_values %>%
filter(PSI >= 0.05) %>%
# Create a better visualization with ggplot2
ggplot(aes(
x = PSI,
y = Feature_Ordered,
fill = PSI_Category
)) +
# Add horizontal bars
geom_bar(stat = "identity", width = 0.7) +
# Add PSI values as text on the bars
geom_text(aes(label = sprintf("%.3f", PSI)),
hjust = -0.2,
size = 3,
fontface = "bold"
) +
# Set colors based on PSI categories
scale_fill_manual(values = c(
"Stable" = "#81C784", # Green for stable
"Monitor" = "#FFD54F", # Yellow for monitor
"Action Required" = "#E57373" # Red for action required
)) +
# Set x-axis limits to accommodate text labels
scale_x_continuous(limits = c(0, max(psi_values$PSI) * 1.3)) +
# Add labels and title
labs(
title = "Population Stability Index by Feature",
subtitle = "Features with PSI < 0.05 not shown",
x = "PSI Value",
y = NULL,
caption = "PSI < 0.1: Stable | 0.1 - 0.25: Monitor | > 0.25: Action Required"
) +
# Customize theme
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14, hjust = 0.5),
plot.subtitle = element_text(size = 10, hjust = 0.5),
legend.position = "bottom",
legend.title = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
plot.caption = element_text(hjust = 0.5, size = 8)
)
psi_values %>%
mutate(PSI = round(PSI, 3)) %>%
select(Feature, PSI, PSI_Category) %>%
datatable() %>%
formatStyle(
'PSI_Category',
backgroundColor = styleEqual(
c("Stable", "Monitor", "Action Required"),
c('#d4edda', '#fff3cd', '#f8d7da')
),
fontWeight = 'bold'
)
nms <- psi_values %>%
arrange(desc(PSI)) %>%
pull(Feature)
for (feature_name in nms) {
results <- analyze_psi_bins(
current_data = current_data,
reference_data = reference_data,
score_col = feature_name,
bins = 10, # number of bins
psi_threshold = 0.1, # threshold for overall PSI
bin_psi_threshold = 0.02 # threshold for bin-level shifts
)
# Create visualizations
cat(feature_name)
plot_psi_analysis(results, feature_name)
}
utilization_unsecuredutilization_personal_loan
pct_accts_delq_30d_ever
max_dpd_m24
max_dpd_m36
max_dpd_m36_personal_loan
max_disbursed_amount_unsecured_closed
max_dpd_m24_personal_loan
oldest_trade_unsecured
total_disbursed_amount_unsecured_closed
total_trd_cnt_consumer_loan_active_ratio
oldest_trade_credit_card
avg_disbursed_amount_personal_loan
max_disbursed_amount_unsecured
total_disbursed_amount_closed
oldest_trade
max_dpd_m12_personal_loan
inq_count_ever_two_wheeler_loan
median_trade_age_secured
inq_count_12m_two_wheeler_loan
max_dpd_m6
max_dpd_m6_active
max_dpd_m6_personal_loan
oldest_trade_credit_card_active
max_dpd_m3
max_dpd_m3_active
total_trd_cnt_credit_card_active_ratio
max_credit_limit
total_disbursed_amount_credit_card
inq_count_6m_two_wheeler_loan
inq_count_3m_two_wheeler_loan
cc_ocl_balance
# Calculate summary statistics for each feature and period
summary_stats <- rbind(
# Current period stats
data.frame(
Feature = features_to_analyze,
Period = "Current",
Mean = sapply(current_data[, ..features_to_analyze], mean, na.rm = T),
SD = sapply(current_data[, ..features_to_analyze], sd, na.rm = T),
Q1 = sapply(current_data[, ..features_to_analyze], quantile, probs = 0.25, na.rm = T),
Median = sapply(current_data[, ..features_to_analyze], median, na.rm = T),
Q3 = sapply(current_data[, ..features_to_analyze], quantile, probs = 0.75, na.rm = T)
),
# Reference period stats
data.frame(
Feature = features_to_analyze,
Period = "Reference",
Mean = sapply(reference_data[, ..features_to_analyze], mean, na.rm = T),
SD = sapply(reference_data[, ..features_to_analyze], sd, na.rm = T),
Q1 = sapply(reference_data[, ..features_to_analyze], quantile, probs = 0.25, na.rm = T),
Median = sapply(reference_data[, ..features_to_analyze], median, na.rm = T),
Q3 = sapply(reference_data[, ..features_to_analyze], quantile, probs = 0.75, na.rm = T)
)
)
# Order by Feature and Period to group features together
summary_stats <- summary_stats[order(summary_stats$Feature, summary_stats$Period), ]
# Add group indicator for alternating colors
summary_stats$Group <- rep(rep(c(1, 2), length.out = length(unique(summary_stats$Feature))), each = 2)
datatable(
summary_stats,
rownames = FALSE,
options = list(
pageLength = 10, # Show all rows
ordering = FALSE, # Disable sorting to maintain grouping
columnDefs = list(list(targets = 7, visible = FALSE)) # Hide Status column
)
) %>%
formatRound(columns = 3:7, digits = 3) %>%
formatStyle(
"Feature",
valueColumns = "Group",
backgroundColor = styleEqual(c(1, 2), c("#ffffff", "#f9f9f9"))
) %>%
formatStyle(
"Period",
backgroundColor = styleEqual(
c("Current", "Reference"),
c("#f2f2f2", "#ffffff")
)
)
The score distribution by target analysis helps evaluate model discrimination. Key metrics to evaluate in this analysis include:
# Calculate score distribution plot by target with enhanced visualization
score_data <- data.frame(
Score = current_predictions,
Target = factor(current_target, levels = c(0, 1), labels = c("Good", "Bad"))
)
# Calculate additional metrics
good_stats <- c(
Mean = mean(current_predictions[current_target == 0]),
Median = median(current_predictions[current_target == 0]),
SD = sd(current_predictions[current_target == 0]),
Q1 = quantile(current_predictions[current_target == 0], 0.25),
Q3 = quantile(current_predictions[current_target == 0], 0.75)
)
bad_stats <- c(
Mean = mean(current_predictions[current_target == 1]),
Median = median(current_predictions[current_target == 1]),
SD = sd(current_predictions[current_target == 1]),
Q1 = quantile(current_predictions[current_target == 1], 0.25),
Q3 = quantile(current_predictions[current_target == 1], 0.75)
)
# Calculate overlap coefficient (area of overlap divided by total area)
good_density <- density(current_predictions[current_target == 0], adjust = 3)
bad_density <- density(current_predictions[current_target == 1], adjust = 3)
# Create a common grid for both densities
grid <- seq(min(good_density$x, bad_density$x),
max(good_density$x, bad_density$x),
length.out = 1000
)
# Interpolate densities on the common grid
good_dens_interp <- approx(good_density$x, good_density$y, grid, rule = 2)$y
bad_dens_interp <- approx(bad_density$x, bad_density$y, grid, rule = 2)$y
# Calculate overlap coefficient (area of overlap divided by total area)
overlap_area <- sum(pmin(good_dens_interp, bad_dens_interp)) * (grid[2] - grid[1])
total_area <- sum(good_dens_interp + bad_dens_interp - pmin(good_dens_interp, bad_dens_interp)) * (grid[2] - grid[1])
overlap_coefficient <- overlap_area / total_area
# Calculate separation metrics
separation <- data.frame(
Metric = c("Mean Difference", "Median Difference", "KS Statistic", "Overlap"),
Value = c(
bad_stats["Mean"] - good_stats["Mean"],
bad_stats["Median"] - good_stats["Median"],
ks.test(
current_predictions[current_target == 0],
current_predictions[current_target == 1]
)$statistic,
overlap_coefficient
)
)
test_results <- ks.test(
current_predictions[current_target == 0],
current_predictions[current_target == 1]
)
# Create enhanced plot with ggplot2
ggplot(score_data, aes(x = Score)) +
# Add median lines (with different line type)
geom_vline(
xintercept = good_stats["Median"],
color = "#3F7D58",
linetype = "dashed",
size = 0.7
) +
geom_vline(
xintercept = bad_stats["Median"],
color = "#EC5228",
linetype = "dashed",
size = 0.7
) +
# Add annotations for means
annotate("text",
x = good_stats["Median"] - 5,
y = max(density(current_predictions)$y) * 0.9,
label = paste0("Median: ", round(good_stats["Median"], 1)),
color = "#3F7D58",
hjust = 1,
fontface = "bold"
) +
annotate("text",
x = bad_stats["Median"] + 5,
y = max(density(current_predictions)$y) * 0.8,
label = paste0("Median: ", round(bad_stats["Median"], 1)),
color = "#EC5228",
hjust = 0,
fontface = "bold"
) +
# Add area shading for overlap
geom_area(
stat = "density",
data = score_data[score_data$Target == "Good", ],
aes(y = ..density..),
fill = "#3F7D58",
alpha = 0.3,
adjust = 3
) +
geom_area(
stat = "density",
data = score_data[score_data$Target == "Bad", ],
aes(y = ..density..),
fill = "#EC5228",
alpha = 0.3,
adjust = 3
) +
# Customize colors
scale_fill_manual(values = c("Good" = "#3F7D58", "Bad" = "#EC5228")) +
# Add labels and title
labs(
title = "Score Distribution Across Good and Bad Accounts",
subtitle = paste0(
"Mean Difference: ", round(separation$Value[1], 1),
" | Overlap:", round(overlap_coefficient, 2) * 100, "%"
),
x = "Score",
y = "Density",
caption = paste(
"KS Test (Null = samples from the same distributions)",
" | Test stat:", round(test_results$statistic, 2),
"| P-Value:", round(test_results$p.value, 9)
)
) +
# Apply theme
theme_minimal() +
theme(
plot.title = element_text(face = "bold", hjust = 0.5, size = 14),
plot.subtitle = element_text(hjust = 0.5, size = 10),
legend.position = "top",
legend.title = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
panel.border = element_rect(color = "grey80", fill = NA, size = 0.5)
)
This section analyzes how changes in each input variable affect the model’s predictions, helping understand the directional relationships and their stability over time.
# Function to calculate binned analysis
analyze_variable_impact <- function(data, variable, pred_var, bins = 10) {
# Convert data to data.frame
df <- as.data.frame(data)
# Ensure numeric
df[[variable]] <- as.numeric(df[[variable]])
df[[pred_var]] <- as.numeric(df[[pred_var]])
# Remove NA values
df <- df[!is.na(df[[variable]]) & !is.na(df[[pred_var]]), ]
# Create bins
breaks <- quantile(df[[variable]], probs = seq(0, 1, length.out = bins + 1), na.rm = TRUE)
breaks <- unique(breaks)
df$bin <- cut(df[[variable]], breaks = breaks, include.lowest = TRUE)
# Calculate statistics by bin
impact_df <- df %>%
group_by(bin) %>%
dplyr::summarise(
avg_pred = mean(get(pred_var), na.rm = TRUE),
count = n(),
min_val = min(get(variable), na.rm = TRUE),
max_val = max(get(variable), na.rm = TRUE)
) %>%
mutate(
bin_label = sprintf("%.2f - %.2f", min_val, max_val),
pct_population = count / sum(count) * 100
)
return(impact_df)
}
# Function to create impact visualization
plot_variable_impact <- function(current_impact, reference_impact, variable, cor_current, cor_reference) {
# Create plot
p <- ggplot() +
geom_line(data = current_impact, aes(x = as.numeric(bin), y = avg_pred, color = "Current"), size = 1) +
geom_point(data = current_impact, aes(x = as.numeric(bin), y = avg_pred, size = pct_population, color = "Current")) +
geom_line(data = reference_impact, aes(x = as.numeric(bin), y = avg_pred, color = "Reference"), size = 1, linetype = "dashed") +
geom_point(data = reference_impact, aes(x = as.numeric(bin), y = avg_pred, size = pct_population, color = "Reference")) +
scale_color_manual(values = c("Current" = "#EC5228", "Reference" = "#3F7D58")) +
scale_size_continuous(name = "% Population", range = c(2, 6)) +
labs(
title = variable,
subtitle = sprintf("Correlation with prediction: Current = %.3f, Reference = %.3f",
cor_current, cor_reference),
x = "Variable Bins (Low to High)",
y = "Average Prediction",
color = "Period"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold"),
legend.position = "top"
)
return(p)
}
The plots below show how each feature affects model predictions:
Note - if variables do not have sufficient unique values, charts might not populate.
# Get top features by correlation
feature_cors <- sapply(features_to_analyze, function(f) {
abs(cor(current_data[[f]], current_predictions, use = "complete.obs"))
})
# Create impact plots
for(feature in features_to_analyze) {
# Calculate impacts
current_impact <- analyze_variable_impact(current_data, feature, pred_var)
reference_impact <- analyze_variable_impact(reference_data, feature, pred_var)
# Calculate correlations
cor_current <- cor(current_data[[feature]], current_predictions, use = "complete.obs")
cor_reference <- cor(reference_data[[feature]], reference_predictions, use = "complete.obs")
# Create and print plot
print(plot_variable_impact(current_impact, reference_impact, feature, cor_current, cor_reference))
}
The table below summarizes how variable relationships have changed between periods:
# Calculate impact changes
impact_changes <- data.frame(
Feature = features_to_analyze,
Current_Correlation = sapply(features_to_analyze, function(f) {
cor(current_data[[f]], current_predictions, use = "complete.obs")
}),
Reference_Correlation = sapply(features_to_analyze, function(f) {
cor(reference_data[[f]], reference_predictions, use = "complete.obs")
})
) %>%
mutate(
Correlation_Change = Current_Correlation - Reference_Correlation,
Direction_Change = sign(Current_Correlation) != sign(Reference_Correlation),
Impact_Change_Category = case_when(
abs(Correlation_Change) < 0.1 ~ "Stable",
abs(Correlation_Change) < 0.2 ~ "Moderate Change",
TRUE ~ "Significant Change"
)
) %>%
arrange(desc(abs(Correlation_Change)))
# Display changes
datatable(
impact_changes,
options = list(pageLength = 10)
) %>%
formatRound(columns = c("Current_Correlation", "Reference_Correlation", "Correlation_Change"), digits = 3) %>%
formatStyle(
"Impact_Change_Category",
backgroundColor = styleEqual(
c("Stable", "Moderate Change", "Significant Change"),
c("#d4edda", "#fff3cd", "#f8d7da")
)
) %>%
formatStyle(
"Direction_Change",
backgroundColor = styleEqual(c(TRUE, FALSE), c("#f8d7da", "white"))
)